scores <- read.csv("hw1_list.csv")
scores$Match <- ifelse(substr(scores$sequence1,1,6) == substr(scores$sequence2,1,6),"Genuine", "Imposter")
genuine.scores <- subset(scores, Match == "Genuine")
imposter.scores <- subset(scores, Match == "Imposter")

1) Distribution of Genuine and Imposter Scores

hist(genuine.scores$score, col = rgb(1,0,0,0.5), main = "Genuine and Imposter Distribution", xlab = "Distance Scores", ylab = "Frequency", freq = FALSE)
hist(imposter.scores$score, col = rgb(0,0,1,0.5), add = T, freq = FALSE)
legend("top", c("Genuine", "Imposter"), fill=c(rgb(1,0,0,0.5), rgb(0,0,1,0.5)))

2) D-prime

genuine.sd <- sd(genuine.scores$score)
imposter.sd <- sd(imposter.scores$score)
genuine.mean <- mean(genuine.scores$score)
imposter.mean <- mean(imposter.scores$score)

dprime <- (sqrt(2) * abs((genuine.mean)-(imposter.mean)))/sqrt((genuine.sd^2)+(imposter.sd^2))

D-prime is approximately 4.4899.

3) Detection Error Tradeoff (DET) Curve

for(threshold in seq(0.00, 0.99, 0.02)) {
  FAR <- nrow(subset(imposter.scores, imposter.scores$score >= threshold))/nrow(imposter.scores)
  FRR <- nrow(subset(genuine.scores, genuine.scores$score < threshold))/nrow(genuine.scores)
  DET <- data.frame(FAR, FRR)
  plot(DET, xlim = range(0, max(FAR)), ylim = range(0, max(FRR)), main = "Detection Error Tradeoff" )
}

Equal Error Rate